1 Summary

This index uses herring observed in predator stomachs to evaluate potential herring trends for the 2025 RT assessment.

Methods are based on

Will discuss a good starting point with the team, but for now submit the 1982 starting point with data through 2022.

2 Workflow

Operational updates to the forage index submitted to the State of the Ecosystem report are in the forageindex github repository: https://github.com/NOAA-EDAB/forageindex

Data input files are in the folder fhdat and were processed with the script VASTforage_ProcessInputDat.R in that folder: https://github.com/NOAA-EDAB/forageindex/blob/main/fhdat/VASTherringWG_ProcessInputDat.R

# Streamlined version of CreateVASTInputs.Rmd for operational updates to forage index indicators
# February 2024
#   This one uses only herring in stomachs, otherwise the same as SOE forage index
#   To be presented to the 2025 herring RT WG

library(tidyverse)
library(here)
library(dendextend)

# Load NEFSC stomach data received from Brian Smith

# object is called `allfh`
load(here("fhdat/allfh.Rdata"))

#object is called allfh21
load(here("fhdat/allfh21.Rdata"))

#object is called allfh22
load(here("fhdat/allfh22.Rdata"))

# bind all NEFSC stomach datasets
allfh <- allfh %>%
  dplyr::bind_rows(allfh21) |>
  dplyr::bind_rows(allfh22)

###############################################################################
# read predator similarity info to generate predator list
# Input NEFSC food habits overlap matrix:

dietoverlap <- read_csv(here("fhdat/tgmat.2022-02-15.csv"))

# use dendextend functions to get list
d_dietoverlap <- dist(dietoverlap)

guilds <- hclust(d_dietoverlap, method = "complete")

dend <- as.dendrogram(guilds)

dend <- color_branches(dend, k=6) # Brian uses 6 categories

labels(dend) <- paste(as.character(names(dietoverlap[-1]))[order.dendrogram(dend)],
                      "(",labels(dend),")", 
                      sep = "")

pisccomplete <- partition_leaves(dend)[[
  which_node(dend, c("Bluefish..S(37)", "Bluefish..M(36)", "Bluefish..L(35)"))
]]


# Filter NEFSC food habits data with predator list
pisccompletedf <- data.frame("COMNAME" = toupper(str_remove(pisccomplete, "\\..*")),
                             "SizeCat" = str_remove(str_extract(pisccomplete, "\\..*[:upper:]+"), "\\.."),
                             "feedguild" = "pisccomplete")

fh.nefsc.pisc.pisccomplete <- allfh %>%
  #filter(pynam != "EMPTY") %>%
  left_join(pisccompletedf, by = c("pdcomnam" = "COMNAME",
                                   "sizecat" = "SizeCat")) %>%
  filter(!is.na(feedguild))


##############################################################################
# Get prey list from NEFSC and NEAMAP
# Irrelevant! The prey list is now Atlantic herring and unid clupeids.

# preycount  <- fh.nefsc.pisc.pisccomplete %>%
#   #group_by(year, season, pdcomnam, pynam) %>%
#   group_by(pdcomnam, pynam) %>%
#   summarise(count = n()) %>%
#   #arrange(desc(count))
#   pivot_wider(names_from = pdcomnam, values_from = count)
# 
# 
# gencomlist <- allfh %>%
#   select(pynam, pycomnam2, gencom2) %>%
#   distinct()
# 
# NEFSCblueprey <- preycount %>%
#   #filter(BLUEFISH > 9) %>%
#   filter(!pynam %in% c("EMPTY", "BLOWN",
#                        "FISH", "OSTEICHTHYES",
#                        "ANIMAL REMAINS",
#                        "FISH SCALES")) %>%
#   #filter(!str_detect(pynam, "SHRIMP|CRAB")) %>%
#   left_join(gencomlist) %>%
#   filter(!gencom2 %in% c("ARTHROPODA", "ANNELIDA",
#                          "CNIDARIA", "UROCHORDATA",
#                          "ECHINODERMATA", "WORMS",
#                          "BRACHIOPODA", "COMB JELLIES",
#                          "BRYOZOA", "SPONGES",
#                          "MISCELLANEOUS", "OTHER")) %>%
#   arrange(desc(BLUEFISH))
# 
# # March 2023, formally add NEAMAP to prey decisions
# NEAMAPblueprey <- read.csv(here("fhdat/Full Prey List_Common Names.csv")) %>%
#   #filter(BLUEFISH > 9) %>%
#   filter(!SCIENTIFIC.NAME %in% c("Actinopterygii", "fish scales",
#                                  "Decapoda (megalope)", 
#                                  "unidentified material",
#                                  "Plantae",
#                                  "unidentified animal"))
# 
# NEAMAPprey <- NEAMAPblueprey %>%
#   dplyr::select(COMMON.NAME, SCIENTIFIC.NAME, BLUEFISH) %>%
#   dplyr::filter(!is.na(BLUEFISH)) %>%
#   dplyr::mutate(pynam2 = tolower(SCIENTIFIC.NAME),
#                 pynam2 = stringr::str_replace(pynam2, "spp.", "sp")) %>%
#   dplyr::rename(NEAMAP = BLUEFISH)
# 
# 
# NEFSCprey <- NEFSCblueprey %>%
#   dplyr::select(pycomnam2, pynam, BLUEFISH) %>%
#   dplyr::filter(!is.na(BLUEFISH)) %>%
#   dplyr::mutate(pynam2 = tolower(pynam)) %>%
#   dplyr::rename(NEFSC = BLUEFISH)
# 
# # new criteria March 2023, >20 observations NEAMAP+NEFSC, but keep mackerel
# # removes the flatfish order (too broad) and unid Urophycis previously in NEAMAP
# blueprey <- NEFSCprey %>% 
#   dplyr::full_join(NEAMAPprey) %>%
#   dplyr::mutate(NEAMAP = ifelse(is.na(NEAMAP), 0, NEAMAP),
#                 NEFSC = ifelse(is.na(NEFSC), 0, NEFSC),
#                 total = NEFSC + NEAMAP,
#                 PREY = ifelse(is.na(SCIENTIFIC.NAME), pynam, SCIENTIFIC.NAME),
#                 COMMON = ifelse(is.na(COMMON.NAME), pycomnam2, COMMON.NAME),
#                 pynam = ifelse(is.na(pynam), toupper(pynam2), pynam)) %>%
#   dplyr::arrange(desc(total)) %>%
#   dplyr::filter(total>20 | pynam=="SCOMBER SCOMBRUS") %>% # >20 leaves out mackerel
#   dplyr::mutate(COMMON = case_when(pynam=="ILLEX SP" ~ "Shortfin squids",
#                                    pynam2=="teuthida" ~ "Unidentified squids",
#                                    TRUE ~ COMMON)) %>%
#   dplyr::mutate(PREY = stringr::str_to_sentence(PREY),
#                 COMMON = stringr::str_to_sentence(COMMON))


fh.nefsc.pisc.pisccomplete.herring <- fh.nefsc.pisc.pisccomplete %>%
  mutate(herring = case_when(pynam %in% c("CLUPEA HARENGUS",
                                          "CLUPEIDAE",
                                          "CLUPEA HARENGUS SCALES",
                                          "CLUPEIDAE SCALES",
                                          "CLUPEA HARENGUS LARVAE",
                                          "CLUPEIDAE LARVAE")  ~ "herring",
                              TRUE ~ "othprey"))

###############################################################################
# Make the NEFSC dataset aggregating prey based on prey list

herringpyall_stn <- fh.nefsc.pisc.pisccomplete.herring %>%
  #create id linking cruise6_station
  #create season_ng spring and fall Spring=Jan-May, Fall=June-Dec
  mutate(id = paste0(cruise6, "_", station),
         year = as.numeric(year),
         month = as.numeric(month),
         season_ng = case_when(month <= 6 ~ "SPRING",
                               month >= 7 ~ "FALL",
                               TRUE ~ as.character(NA))
  ) %>%
  dplyr::select(year, season_ng, id, stratum,
                pynam, pyamtw, pywgti, pyvoli, herring, 
                pdcomnam, pdid, pdlen, pdsvol, pdswgt, 
                beglat, beglon, declat, declon, 
                bottemp, surftemp, setdepth) %>%
  group_by(id) %>%
  #mean blueprey g per stomach per tow: sum all blueprey g/n stomachs in tow
  mutate(herringwt = case_when(herring == "herring" ~ pyamtw,
                              TRUE ~ 0.0),
         herringpynam = case_when(herring == "herring" ~ pynam,
                               TRUE ~ NA_character_)) 

# Optional: save at prey disaggregated stage for paper
#saveRDS(bluepyall_stn, here("fhdat/bluepyall_stn.rds"))

# Now get station data in one line
stndat <- herringpyall_stn %>%
  dplyr::select(year, season_ng, id, 
                beglat, beglon, declat, declon, 
                bottemp, surftemp, setdepth) %>%
  distinct()

#pisc stomachs in tow count pdid for each pred and sum
piscstom <- herringpyall_stn %>%
  group_by(id, pdcomnam) %>%
  summarise(nstompd = n_distinct(pdid)) %>%
  group_by(id) %>%
  summarise(nstomtot = sum(nstompd))

#mean and var pred length per tow
pisclen <- herringpyall_stn %>%
  summarise(meanpisclen = mean(pdlen),
            varpisclen = var(pdlen))

# Aggregated prey at station level with predator covariates
herringpyagg_stn <- herringpyall_stn %>%
  summarise(sumherringwt = sum(herringwt),
            nherringsp = n_distinct(herringpynam, na.rm = T),
            npreysp = n_distinct(pynam),
            npiscsp = n_distinct(pdcomnam)) %>%
  left_join(piscstom) %>%
  mutate(meanherringwt = sumherringwt/nstomtot) %>%
  left_join(pisclen) %>%
  left_join(stndat)

# save at same stage as before, writing over old file
#saveRDS(bluepyagg_stn, here("fhdat/bluepyagg_stn.rds"))

# current dataset, fix declon, add vessel, rename NEFSC
#nefsc_bluepyagg_stn <- readRDS(here("fhdat/bluepyagg_stn.rds")) %>%
nefsc_herringpyagg_stn <- herringpyagg_stn %>%
  mutate(declon = -declon,
         vessel = case_when(year<2009 ~ "AL",
                            year>=2009 ~ "HB", 
                            TRUE ~ as.character(NA)))

##############################################################################
# Add NEAMAP to make full aggregated stomach dataset

# Read in NEAMAP updated input from Jim Gartland, reformat with same names
neamap_herringagg_stn <- read_csv(here("fhdat/NEAMAP_Mean stomach weights_Bluefish Prey_wWQ_AH Only.csv")) %>%  
  mutate(vessel = "NEAMAP") %>%
  rename(id = station,
         sumherringwt = sumbluepreywt,
         nherringsp = nbfpreyspp,
         #npreysp = ,
         npiscsp = npiscspp,
         nstomtot = nstomtot, 
         meanherringwt = meanbluepreywt,
         meanpisclen = meanpisclen.simple, 
         #varpisclen = ,
         season_ng = season,
         declat  = lat,
         declon = lon,
         bottemp = bWT,
         #surftemp = , 
         setdepth = depthm) 


# combine NEAMAP and NEFSC
herringagg_stn_all <-  nefsc_herringpyagg_stn %>%
  bind_rows(neamap_herringagg_stn) 

# Save before SST integration step
#saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))

###############################################################################
# Add SST into NEAMAP and reintegrate into full dataset

# Read back in if needed for SST
#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

NEFSCstations <- allfh %>%
  dplyr::mutate(id = paste0(cruise6, "_", station),
                year = as.numeric(year),
                month = as.numeric(month),
                day = as.numeric(day),
                declon = -declon) %>%
  dplyr::select(id, year, month, day, declat, declon) %>%
  dplyr::distinct()

# Need NEAMAP SST update! This is the old file
NEAMAPstationSST <- read.csv(here("fhdat/NEAMAP SST_2007_2022_Nov2023.csv"))

NEAMAPstations <- NEAMAPstationSST %>%
  dplyr::mutate(id = station,
                year = as.numeric(year),
                month = as.numeric(month),
                day = as.numeric(day)) %>%
  dplyr::select(id, year, month, day) %>%
  dplyr::distinct()

# remake diethauls
diethauls <- herringagg_stn_all %>%
  dplyr::select(id, declat, declon)

NEFSCstations <- dplyr::select(NEFSCstations, c(-declat, -declon))

Allstations <- bind_rows(NEFSCstations, NEAMAPstations)

#station id, lat lon, year month day
diethauls <- left_join(diethauls, Allstations)

#add year month day to diet data
herringagg_stn_all <- left_join(herringagg_stn_all, diethauls)

# add NEAMAP SST to surftemp field
NEAMAPidSST <- NEAMAPstationSST %>%
  mutate(id = station) %>%
  dplyr::select(id, SST)

herringagg_stn_all <- left_join(herringagg_stn_all, NEAMAPidSST, by="id") %>%
  mutate(surftemp = coalesce(surftemp, SST)) %>%
  dplyr::select(-SST)

# save merged dataset with day, month, and NEAMAP surftemp, same name
#saveRDS(bluepyagg_stn_all, here("fhdat/bluepyagg_stn_all.rds"))

###############################################################################
#Now match stations to OISST

#make an SST dataframe for 2022! Add to saved sst_data in data-raw/gridded

library(sf)
library(raster)
library(terra)
library(nngeo)

# already have all OISST data as dataframes on this repo

# # Bastille function from https://github.com/kimberly-bastille/ecopull/blob/main/R/utils.R
# 
# nc_to_raster <- function(nc,
#                          varname,
#                          extent = c(0, 360, -90, 90),
#                          crop = raster::extent(280, 300, 30, 50),
#                          show_images = FALSE) {
#   
#   message("Reading .nc as brick...")
#   
#   r <- raster::brick(nc, varname = varname)
#   
#   message("Setting CRS...")
#   raster::crs(r) <- "+proj=longlat +lat_1=35 +lat_2=45 +lat_0=40 +lon_0=-77 +x_0=0 +y_0=0 +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
#   
#   # not sure if this is necessary?
#   raster::extent(r) <- raster::extent(extent)
#   
#   if(show_images){
#     par(mfrow = c(1,2))
#     raster::plot(r, 1, sub = "Full dataset")
#   }
#   
#   message("Cropping data...")
#   ne_data <- raster::crop(r, crop)
#   #ne_data <- raster::rotate(ne_data) add here for future pulls
#   
#   if(show_images){
#     raster::plot(ne_data, 1, sub = "Cropped dataset")
#     par(mfrow = c(1,1))
#   }
#   
#   message("Done!")
#   
#   return(ne_data)
# }
# 
# # function to convert to dataframe based on
# # https://towardsdatascience.com/transforming-spatial-data-to-tabular-data-in-r-4dab139f311f
# 
# raster_to_sstdf <- function(brick,
#                             rotate=TRUE){
#   
#   if(rotate) brick_r <- raster::rotate(brick)
#   brick_r <- raster::crop(brick_r, raster::extent(-77,-65,35,45))
#   sstdf <- as.data.frame(raster::rasterToPoints(brick_r, spatial = TRUE))
#   sstdf <- sstdf %>%
#     dplyr::rename(Lon = x,
#                   Lat = y) %>%
#     tidyr::pivot_longer(cols = starts_with("X"),
#                         names_to = c("year", "month", "day"),
#                         names_prefix = "X",
#                         names_sep = "\\.",
#                         values_to = "sst",
#     )
#   return(sstdf)
# }
# 
# # pull the OISST data as raster brick, modified from 
# # https://github.com/kimberly-bastille/ecopull/blob/main/.github/workflows/pull_satellite_data.yml
# 
# varname <- "sst"
# 
# # 1985-2021 previously pulled, processed and stored. add 2022.
# # add 1981-1984 to extend back in time. No OISST before 1981.
# # 1981 is only Sept-Dec so don't use
# 
# years <- #1982:1984 # 2022
# for(i in years) {
#   name <- paste0(i, ".nc")
#   dir.create(here::here("data-raw","gridded", "sst_data"), recursive = TRUE)
#   filename <- here::here("data-raw","gridded", "sst_data", paste0("test_", i, ".grd"))
#   url <- paste0("https://downloads.psl.noaa.gov/Datasets/noaa.oisst.v2.highres/sst.day.mean.", i, ".nc")
#   download.file(url, destfile = name)
#   
#   text <- knitr::knit_expand(text = "test_{{year}} <- nc_to_raster(nc = name, varname = varname)
#                                      raster::writeRaster(test_{{year}}, filename = filename, overwrite=TRUE)",
#                              year = i)
#   print(text)
#   try(eval(parse(text = text)))
#   unlink(name) # remove nc file to save space
#   print(paste("finished",i))
# }
# 
# 
# # convert raster to dataframe
# #years <- 2022
# for(i in years) {
#   name <- get(paste0("test_",i))
#   filename <- here::here("data-raw","gridded", "sst_data", paste0("sst", i, ".rds"))
#   text <- knitr::knit_expand(text = "sst{{year}} <- raster_to_sstdf(brick = name)
#                                      saveRDS(sst{{year}}, filename)",
#                              year = i)
#   print(text)
#   try(eval(parse(text = text)))
#}
#read in diet data with month day fields

#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))

stations <- herringagg_stn_all %>%
  dplyr::mutate(day = str_pad(day, 2, pad='0'),
                month = str_pad(month, 2, pad='0'),
                yrmody = as.numeric(paste0(year, month, day))) %>%
  dplyr::select(id, declon, declat, year, yrmody) %>%
  na.omit() %>%
  sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE) 



#list of SST dataframes
SSTdfs <- list.files(here("data-raw/gridded/sst_data/"), pattern = "*.rds")

dietstn_OISST <- tibble()


for(df in SSTdfs){
  sstdf <- readRDS(paste0(here("data-raw/gridded/sst_data/", df)))
  
  # keep only bluefish dates in SST year
  stationsyr <- stations %>%
    filter(year == unique(sstdf$year))
  
  # keep only sst days in bluefish dataset
  sstdf_survdays <- sstdf %>%
    dplyr::mutate(yrmody = as.numeric(paste0(year, month, day)) )%>%
    dplyr::filter(yrmody %in% unique(stationsyr$yrmody)) %>%
    dplyr::mutate(year = as.numeric(year),
                  month = as.numeric(month),
                  day = as.numeric(day),
                  declon = Lon,
                  declat = Lat) %>%
    dplyr::select(-Lon, -Lat) %>%
    sf::st_as_sf(coords=c("declon","declat"), crs=4326, remove=FALSE)  
  
  # now join by nearest neighbor and date
  
  #https://stackoverflow.com/questions/71959927/spatial-join-two-data-frames-by-nearest-feature-and-date-in-r      
  
  yrdietOISST <- do.call('rbind', lapply(split(stationsyr, 1:nrow(stationsyr)), function(x) {
    sf::st_join(x, sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),],
                #join = st_nearest_feature
                join = st_nn, k = 1, progress = FALSE
    )
  }))
  
  #   #datatable solution--works but doesnt seem faster?
  #    df1 <- data.table(stationsyr)
  #   
  #  .nearest_samedate <- function(x) {
  #    st_join(st_as_sf(x), sstdf_survdays[sstdf_survdays$yrmody == unique(x$yrmody),], join = st_nearest_feature)
  #  }
  # # 
  #  yrdietOISST <- df1[, .nearest_samedate(.SD), by = list(1:nrow(df1))]
  
  dietstn_OISST <- rbind(dietstn_OISST, yrdietOISST)
  
}

#saveRDS(dietstn_OISST, here("data-raw/dietstn_OISST.rds"))

# Now join with OISST dataset

#bluepyagg_stn_all <- readRDS(here("fhdat/bluepyagg_stn_all.rds"))
#dietstn_OISST <- readRDS(here("data-raw/dietstn_OISST.rds"))


dietstn_OISST_merge <- dietstn_OISST %>%
  dplyr::rename(declon = declon.x,
                declat = declat.x,
                year = year.x,
                oisst = sst) %>%
  dplyr::select(id, oisst) %>%
  sf::st_drop_geometry()

herringagg_stn_all_OISST <- left_join(herringagg_stn_all, dietstn_OISST_merge)

saveRDS(herringagg_stn_all_OISST, here("fhdat/herringagg_stn_all_OISST_1982-2022.rds"))

VAST models were run using the script VASTunivariate_seasonalforageindex_operational.R in the folder VASTscripts: https://github.com/NOAA-EDAB/forageindex/blob/main/VASTscripts/VASTunivariate_seasonalherringindex.R

# This is the exact VAST code used in Gaichas et al 2023, but with data years 
# extended from 1973-2022
# VAST attempt 2 univariate model as a script
# modified from https://github.com/James-Thorson-NOAA/VAST/wiki/Index-standardization

# Load packages
library(here)
library(dplyr)
library(VAST)

#Read in data, separate spring and fall, and rename columns for VAST:

# this dataset created in SSTmethods.Rmd

herringagg_stn <- readRDS(here::here("fhdat/herringagg_stn_all_OISST_1982-2022.rds"))

# make SST column that uses surftemp unless missing or 0
# there are 3 surftemp 0 values in the dataset, all with oisst > 15
herringagg_stn <- herringagg_stn %>%
  dplyr::mutate(sstfill = ifelse((is.na(surftemp)|surftemp==0), oisst, surftemp))

#herringagg_stn <- bluepyagg_pred_stn

# filter to assessment years at Tony's suggestion

# code Vessel as AL=0, HB=1, NEAMAP=2

herringagg_stn_fall <- herringagg_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanherringwt, #use bluepywt for individuals input in example
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()

herringagg_stn_spring <- herringagg_stn %>%
  filter(season_ng == "SPRING") |>
        #,year > 1984) %>%
  mutate(AreaSwept_km2 =1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = as.numeric(as.factor(vessel))-1
         ) %>% 
  dplyr::select(Catch_g = meanherringwt,
         Year = year,
         Vessel,
         AreaSwept_km2,
         Lat = declat,
         Lon = declon,
         meanpisclen,
         npiscsp,
         #bottemp, #this leaves out many stations for NEFSC
         #surftemp, #this leaves out many stations for NEFSC
         #oisst, #leaves out everything before 1982
         sstfill
         ) %>%
  na.omit() %>%
  as.data.frame()


# Make settings (turning off bias.correct to save time for example)
# NEFSC strata limits https://github.com/James-Thorson-NOAA/VAST/issues/302

# use only MAB, GB, GOM, SS EPUs 
# leave out south of Cape Hatteras at Elizabeth's suggestion
# could also leave out SS?
# CHECK if these EPUs match what we use in SOE

MAB <- c(1010:1080, 1100:1120, 1600:1750, 3010:3450, 3470, 3500, 3510)
GB  <- c(1090, 1130:1210, 1230, 1250, 3460, 3480, 3490, 3520:3550)
GOM <- c(1220, 1240, 1260:1290, 1360:1400, 3560:3830)
SS  <- c(1300:1352, 3840:3990)

coast3nmbuffst <- readRDS(here::here("spatialdat/coast3nmbuffst.rds"))

# Mid Atlantic
MAB2 <- coast3nmbuffst %>% 
  dplyr::filter(stratum_number %in% MAB) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# Georges Bank EPU
GB2 <- coast3nmbuffst %>% 
  dplyr::filter(stratum_number %in% GB) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# gulf of maine EPU (for SOE)
GOM2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% GOM) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# scotian shelf EPU (for SOE)
SS2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% SS) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()


# needed to cover the whole northwest atlantic grid
allother2 <- coast3nmbuffst %>%
  dplyr::filter(!stratum_number %in% c(MAB, GB, GOM, SS)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# all epus
allEPU2 <- coast3nmbuffst %>%
  dplyr::filter(stratum_number %in% c(MAB, GB, GOM, SS)) %>%
  dplyr::select(stratum_number2) %>%
  dplyr::distinct()

# # configs same FieldConfig as below but formatted differently
# FieldConfig <- c(
#   "Omega1"   = 0,   # number of spatial variation factors (0, 1, AR1)
#   "Epsilon1" = 0,   # number of spatio-temporal factors
#   "Omega2"   = 0, 
#   "Epsilon2" = 0
# ) 

# default configs, not really specified anyway
FieldConfig = matrix( "IID", ncol=2, nrow=3, 
                      dimnames=list(c("Omega","Epsilon","Beta"),c("Component_1","Component_2")))


RhoConfig <- c(
  "Beta1" = 0,      # temporal structure on years (intercepts) 
  "Beta2" = 0, 
  "Epsilon1" = 0,   # temporal structure on spatio-temporal variation
  "Epsilon2" = 0
) 
# 0 off (fixed effects)
# 1 independent
# 2 random walk
# 3 constant among years (fixed effect)
# 4 AR1

OverdispersionConfig    <- c("eta1"=0, "eta2"=0)
# eta1 = vessel effects on prey encounter rate
# eta2 = vessel effects on prey weight

strata.limits <- as.list(c("AllEPU" = allEPU2, 
                           "MAB" = MAB2,
                           "GB" = GB2,
                           "GOM" = GOM2,
                           "allother" = allother2))

settings = make_settings( n_x = 500, 
                          Region = "northwest_atlantic",
                          Version = "VAST_v14_0_1", #needed to prevent error from newer dev version number
                          #strata.limits = list('All_areas' = 1:1e5), full area
                          strata.limits = strata.limits,
                          purpose = "index2", 
                          bias.correct = TRUE,
                          #use_anisotropy = FALSE,
                          #fine_scale = FALSE,
                          #FieldConfig = FieldConfig,
                          #RhoConfig = RhoConfig,
                          OverdispersionConfig = OverdispersionConfig
                          )


New_Extrapolation_List <- readRDS(here::here("spatialdat/CustomExtrapolationList.rds"))

# select dataset and set directory for output

#########################################################
# Run model fall

season <- c("fall_500_lennosst_ALLsplit_biascorrect")

working_dir <- here::here(sprintf("herringpyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}

# subset for faster testing
#herringagg_stn_fall <- herringagg_stn_fall %>% filter(Year<1990)                       

fit <- fit_model(
  settings = settings, 
  extrapolation_list = New_Extrapolation_List,
  Lat_i = herringagg_stn_fall$Lat, 
  Lon_i = herringagg_stn_fall$Lon, 
  t_i = herringagg_stn_fall$Year, 
  b_i = as_units(herringagg_stn_fall[,'Catch_g'], 'g'),
  a_i = rep(1, nrow(herringagg_stn_fall)),
  v_i = herringagg_stn_fall$Vessel,
  Q_ik = as.matrix(herringagg_stn_fall[,c("npiscsp", 
                                         "meanpisclen", 
                                         "sstfill"
                                         )]),
  #Use_REML = TRUE,
  working_dir = paste0(working_dir, "/"))

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/"))

######################################################
# Run model spring

season <- c("spring_500_lennosst_ALLsplit_biascorrect")

working_dir <- here::here(sprintf("herringpyindex/allagg_%s/", season))

if(!dir.exists(working_dir)) {
  dir.create(working_dir)
}                         
  
# subset for faster testing
#herringagg_stn_spring <- herringagg_stn_spring %>% filter(Year<1990) 

fit <- fit_model( settings = settings,  
                 extrapolation_list = New_Extrapolation_List,
                 Lat_i = herringagg_stn_spring[,'Lat'], 
                 Lon_i = herringagg_stn_spring[,'Lon'], 
                 t_i = herringagg_stn_spring[,'Year'], 
                 b_i = as_units(herringagg_stn_spring[,'Catch_g'], 'g'), 
                 a_i = rep(1, nrow(herringagg_stn_spring)),
                 v_i = herringagg_stn_spring$Vessel,
                 Q_ik = as.matrix(herringagg_stn_spring[,c("npiscsp", 
                                                          "meanpisclen", 
                                                          "sstfill"
                                                          )]),
                # Use_REML = TRUE,
                 working_dir = paste0(working_dir, "/"))

saveRDS(fit, file = paste0(working_dir, "/fit.rds"))

# Plot results
plot( fit,
      working_dir = paste0(working_dir, "/")) 

3 Results

3.1 Fall model: failed

Probably not enough data in fall.

List of estimated fixed and random effects:
    Coefficient_name Number_of_coefficients   Type
1           beta1_ft                     48  Fixed
2           beta2_ft                     48  Fixed
3       L_epsilon1_z                      1  Fixed
4       L_epsilon2_z                      1  Fixed
5         L_omega1_z                      1  Fixed
6         L_omega2_z                      1  Fixed
7          lambda1_k                      3  Fixed
8          lambda2_k                      3  Fixed
9         ln_H_input                      2  Fixed
10         logkappa1                      1  Fixed
11         logkappa2                      1  Fixed
12         logSigmaM                      1  Fixed
13 Epsiloninput1_sff                  27750 Random
14 Epsiloninput2_sff                  27750 Random
15    Omegainput1_sf                    555 Random
16    Omegainput2_sf                    555 Random

### Checking model at initial values
All fixed effects have a nonzero gradient

... After 167 iterations...

The following parameters appear to be approaching zero:
          Param starting_value Lower           MLE Upper final_gradient
55 L_epsilon1_z              1  -Inf -8.171138e-07   Inf  -7.579271e-05
Please turn off factor-model variance parameters `L_` that are approaching zero and re-run the model

Error: Please change model structure to avoid problems with parameter estimates and then re-try; see details in `?check_fit`
In addition: Warning message:
In nlminb(start = startpar, objective = fn, gradient = gr, control = nlminb.control,  :
  NA/NaN function evaluation

Trying spring model

3.2 Spring results

Ran. Check diagnostics.

SOEinputs <- function(infile, season, outfile) {
  
  splitoutput <- read.csv(infile)
  
  # warning, hardcoded. obviously
  stratlook <- data.frame(Stratum = c("Stratum_1",
                                      "Stratum_2",
                                      "Stratum_3",
                                      "Stratum_4",
                                      "Stratum_5",
                                      "Stratum_6",
                                      "Stratum_7",
                                      "Stratum_8",
                                      "Stratum_9",
                                      "Stratum_10",
                                      "Stratum_11",
                                      "Stratum_12",
                                      "Stratum_13",
                                      "Stratum_14",
                                      "Stratum_15"),
                          Region  = c("AllEPU", 
                                      "MABGB", 
                                      "MABGBstate", 
                                      "MABGBfed", 
                                      "MAB",
                                      "GB",
                                      "GOM",
                                      "bfall",
                                      "bfin",
                                      "bfoff",
                                      "MABGBalbinshore",
                                      "MABGBothoffshore",
                                      "albbfin",
                                      "albbfall",
                                      "allother"))
  
  forageindex <- splitoutput %>%
    left_join(stratlook) %>%
    dplyr::select(Time, 
                  EPU = Region, 
                  "Forage Fish Biomass Estimate" = Estimate, 
                  "Forage Fish Biomass Estimate SE" = Std..Error.for.Estimate) %>%
    tidyr::pivot_longer(c("Forage Fish Biomass Estimate", "Forage Fish Biomass Estimate SE"), 
                        names_to = "Var", values_to = "Value") %>%
    dplyr::filter(EPU %in% c("MAB", "GB", "GOM", "AllEPU")) %>%
    dplyr::mutate(Units = "relative grams per stomach") %>%
    dplyr::select(Time, Var, Value, EPU, Units)
  
  forageindex$Var <- stringr::str_c(season, forageindex$Var, sep = " ")
  
  #readr::write_csv(forageindex, outfile)
  saveRDS(forageindex, outfile)
  
} 


# make data files

SOEinputs(infile = here::here("herringpyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/Index.csv"),
          season = "Spring", 
           outfile = here::here("herringpyindex/springherringindex.rds"))


forageindex <- readRDS(here::here("herringpyindex/springherringindex.rds"))

# test plot
foragewide <- forageindex %>%
  pivot_wider(names_from = Var, values_from = Value)


ggplot(foragewide, aes(x=Time, y=`Spring Forage Fish Biomass Estimate`, colour = EPU)) +
  geom_errorbar(aes(ymin=`Spring Forage Fish Biomass Estimate`+`Spring Forage Fish Biomass Estimate SE`,
                    ymax=`Spring Forage Fish Biomass Estimate`-`Spring Forage Fish Biomass Estimate SE`))+
  geom_point()+
  geom_line()

knitr::include_graphics(here::here("herringpyindex/allagg_spring_500_lennosst_ALLsplit_biascorrect/ln_density-predicted.png"))